stopifnot(packageVersion("quanteda") >= "1.5")
stopifnot(packageVersion("LSX") >= "0.7.5")
require(quanteda)
require(quanteda.textstats)

color <- RColorBrewer::brewer.pal(5, "Set1")

dict_ja <- dictionary(file = "dictionary_ja.yml")
dict_he <- dictionary(file = "dictionary_he.yml")

tokenize_hebrew <- function(corp) {
    
    toks <- tokens(corp, remove_punct = TRUE, remove_symbols = TRUE, remove_numbers = TRUE, 
                   remove_url = TRUE, padding = TRUE)
    toks <- tokens_select(toks, "^[\\p{Nd}\\p{L}]+$", valuetype = 'regex', case_insensitive = FALSE,
                          padding = TRUE) # remove symbols
    
    min_count <- 500
    seqs <- toks %>% 
      tokens_remove(stopwords("he", "marimo"), padding = TRUE) %>% 
      textstat_collocations(min_count = min_count, method = "lambda", tolower = FALSE)
    seqs <- seqs[seqs$z > 3]
    toks <- tokens_compound(toks, seqs, concatenator = ' ', join = TRUE)
    save(seqs, file = "collocations_he.Rdata")
    return(toks)
    
}


tokenize_japanese <- function(corp) {
    
    toks <- tokens(corp, remove_punct = TRUE, remove_symbols = TRUE, remove_numbers = TRUE, 
                   remove_url = TRUE, padding = TRUE)
    toks <- tokens_select(toks, "^[０-９ぁ-んァ-ヶー一-龠]+$", valuetype = 'regex', padding = TRUE)
    
    min_count <- 500
    seqs <- toks %>% 
      tokens_remove(stopwords("ja", "marimo"), padding = TRUE) %>% 
      tokens_select('^[ァ-ヶー一-龠]+$', valuetype = 'regex', padding = TRUE) %>% 
      textstat_collocations(min_count = min_count, method = "lambda", tolower = FALSE)
    seqs <- seqs[seqs$z > 3,]
    toks <- tokens_compound(toks, seqs, concatenator = '', join = TRUE)
    save(seqs, file = "collocations_ja.Rdata")
    return(toks)
    
}

fix_japanese <- function(toks) {
    toks <- tokens_split(toks, "っ", valuetype = "fixed", remove_separator = FALSE)
    toks <- tokens_compound(toks, list(c("^[一-龠]$", "^っ$")), valuetype = "regex", concatenator = "")
    return(toks)
}

plot_lss <- function(x, select = NULL, lss_var = "lss", span = 0.1, col = "black", add = FALSE, se = TRUE,
                     from = as.Date("2009-01-01"), to = as.Date("2018-12-31")) {
    par(mar = c(2.1, 4.1, 1.1, 1.1))
    if (!is.null(select))
        x <- subset(x, publication == select)
    pred <- LSX::smooth_lss(
        x, lss_var = lss_var, span = span,
        from = from, to = to
    )
    if (!add) {
        plot(pred$date, pred$fit, col = rgb(0, 0, 0, 0.05), pch = 16, lwd = 2,
             ylim = c(-1, 1), xlab = "", ylab = "Threat emphasis", type = "n")
        abline(h = 0, lty = c(1, 2))
        abline(h = 0)
    }
    lines(pred$date, pred$fit, type = "l", col = col, lwd = 2,)
    if (se) {
        lines(pred$date, pred$fit + pred$se.fit * 2, type = "l", lwd = 2, col = adjustcolor(col, 0.2))
        lines(pred$date, pred$fit - pred$se.fit * 2, type = "l", lwd = 2, col = adjustcolor(col, 0.2))
    }
}

plot_loess <- function(x,  subset = NULL, lss_var = "lss", span = 0.1, col = "black", 
                       ylim = c(-1, 1), ylab = "",
                       add = FALSE, se = TRUE,
                       from = as.Date("2009-01-01"), to = as.Date("2018-12-31")) {
    
    par(mar = c(2.1, 4.1, 1.1, 1.1))
    if (!is.null(subset))
        x <- subset(x, subset)
    pred <- LSX::smooth_lss(
        x, lss_var = lss_var, span = span, 
        from = from, to = to
    )
    if (!add) {
        plot(pred$date, pred$fit, col = rgb(0, 0, 0, 0.05), pch = 16, lwd = 2,
             ylim = ylim, xlab = "", ylab = ylab, type = "n")
    }
    lines(pred$date, pred$fit, type = "l", col = col, lwd = 2,)
    if (se) {
        lines(pred$date, pred$fit + pred$se.fit * 2, type = "l", lwd = 2, col = adjustcolor(col, 0.2))
        lines(pred$date, pred$fit - pred$se.fit * 2, type = "l", lwd = 2, col = adjustcolor(col, 0.2))
    }
}

plot_volume <- function(x, select = NULL, m = c(10, 10), add = FALSE, ...) {
    
    date <- seq(as.Date("2009-01-01"), 
                as.Date("2018-12-31"), by = "day")
    
    par(mar = c(2.1, 4.1, 1.1, 1.1))
    if (!is.null(select))
        x <- subset(x, publication == select)
    freq <- as.matrix(table(factor(as.character(x$date), 
                                   levels = as.character(date))))
    smooth <- kernapply(freq, kernel("daniell", m))
    if (!add) {
        plot(as.Date(rownames(smooth)), smooth, lty = 1, type = "n", lwd = 2,
             ylab = "Volume", ...)
    }
    lines(as.Date(rownames(smooth)), smooth, lty = 1, type = "l", lwd = 2, ...)
}

add_events <- function(x, lty = 3, gap = 90, margin = 0.03, line = TRUE) {
    
    m <- names(x)
    event <- as.Date(unlist(x))
    names(event) <- m
    event <- sort(event)
    names(event)[c(FALSE, diff(event) < gap)] <- ""
    text(event, par("usr")[4] - margin * diff(par("usr")[3:4]), family = "Arial Narrow",
         names(event), srt = 90, adj = 0, pos = 2)
    if (line)
        abline(v = event, lty = lty)
}

flag_events <- function(x, y, window, merge = TRUE) {
    date <- x$date
    type <- c("election", "legislation", "military", "diplomacy")
    for (m in type) {
        f <- rep(0, length(date))
        event <- sort(unlist(y[[m]]))
        for (i in seq_along(event)) {
            d <- event[[i]]
            if (is.character(d)) 
                d <- as.Date(d)
            if (m %in% c("election", "legislation")) {
                f[d - window <= date & date <= d] <- i  
            } else if (m %in% c("military", "diplomacy")) {
                f[d <= date & date <= d + window] <- i  
            }
        }
        if (merge) {
            x[[m]] <- f > 0
        } else {
            x[[m]] <- factor(f)
        }
    }
    return(x)
}

plot_dummy <- function(x, window, ...) {
    dummy <- data.frame(date = seq.Date(as.Date("2009-01-01"), as.Date("2018-12-31"), by = "1 day"))
    dummy <- flag_events(dummy, x, window, merge = FALSE)
    par(mar = c(2, 6, 3, 1), font.main = 1)
    plot(dummy$date, rep(1, nrow(dummy)), type = "n", ylim = c(0.5, 4.5), yaxt = "n", ylab = "", ...)
    
    points(dummy$date, rep(1, nrow(dummy)), col = ifelse(dummy$election != 0, 1, NA), pch = 15)
    points(dummy$date, rep(2, nrow(dummy)), col = ifelse(dummy$legislation != 0, 1, NA), pch = 15)
    points(dummy$date, rep(3, nrow(dummy)), col = ifelse(dummy$military != 0, 1, NA), pch = 15)
    points(dummy$date, rep(4, nrow(dummy)), col = ifelse(dummy$diplomacy != 0, 1, NA), pch = 15)
    axis(2, 1:4, c("election", "legislation", "military", "diplomacy"), las = 2)

}

plot_prediction <- function(x, col = "black", var = "legislation", label = "Legislative",
                            add = FALSE, ...) {
    
    par(mar = c(4.1, 4.1, 4.1, 1.1))
    if (!add) {
        plot(as.integer(x[[var]]), x$fit, type = "n", font.main = 1, ...,
             xaxt = "n", ylab = "Threat emphasis (predicted)", xlab = paste(label, "events"))
        axis(1, levels(x[[var]]), paste0(stringi::stri_sub(label, 1, 1), levels(x[[var]])))
    }
    lines(x[[var]], x$fit, type = "b", lwd = 2, col = col)
    lines(x[[var]], x$lwr, type = "l", lwd = 2, col = adjustcolor(col, 0.2))
    lines(x[[var]], x$upr, type = "l", lwd = 2, col = adjustcolor(col, 0.2))
    
}

table_event <-  function (x) {
    
    type_include <- c("military", "diplomacy", "election", "legislation")
    x <- x[type_include]
    type <- rep(names(x), lengths(x))
    names(x) <- NULL
    temp <- data.frame(type = type,
                       event = names(unlist(x)),
                       date = unlist(x))
    temp <- temp[order(temp$date),]
    tb <- table(temp$event, temp$type)
    tb <- tb[rownames(temp),]
    tb_cs <- apply(tb, 2, cumsum)
    tb_cs[tb == 0] <- 0
    result <- as.data.frame.matrix(tb_cs)
    result$date <- temp$date
    result$event <- temp$event
    result[result == 0] <- ""
    rownames(result) <- NULL
    return(result[,c("date", "event", type_include)])
}
